Sys.setenv(`_R_S3_METHOD_REGISTRATION_NOTE_OVERWRITES_` = "false")

suppressPackageStartupMessages({
  library(here)
  library(tidyverse)
})

# Load relevant data
source(here("3_analysis", "data", "get_lfc_dfs.R"))
load(here("3_analysis", "data", "lfc_dfs.RData"))
source(here("3_analysis", "data", "get_enrichr_over.R"))
load(here("3_analysis", "data", "enrichr_over.RData"))

# knit options
options(knitr.duplicate.label = "allow")

Intro

We will analyse bulk RNA-Seq data (GSE137001) referenced by Velychko et al. (2019)1, following these rough steps:

  • Reprocess the raw data (PRJNA564252) using sra-tools, salmon, Snakemake and tximport
    • See the 2_reprocess directory for details on the workflow
  • Using MEFs as the base reference, perform DEG analysis with DESeq2 on every other timepoint
  • Select significant over-expressed genes and pass them to the Enrichr browser tool
  • Plot and tabulate DEGs (volcano plot and data table)

Pathway Heatmaps (Enrichr Scores)

For each timepoint pair, we sent significant DEGs to Enrichr. A DEG is significant if it’s adjusted p-value is less than 0.01, and absolute log2 fold chance is greater than 0.5.

# Extract metrics from enrichr results
# GO:0036500 is commented out as it didn't show up in the enrichr results for all timepoints
# pathways <- c("GO:0009267", "GO:0034198", "GO:0036003", "GO:0036500", "GO:1990928", "GO:0034599", "GO:0034976", "GO:0016236", "GO:0010506")
pathways <- c("GO:0009267", "GO:0034198", "GO:0036003", "GO:1990928", "GO:0034599", "GO:0034976", "GO:0016236", "GO:0010506")

odds_ratios <- lapply(enrichr_over, function(res) {
  or <- res[["GO_Biological_Process_2021"]] %>%
    filter(str_detect(Term, paste(pathways, collapse="|"))) %>%
    arrange(Term) %>%
    pull(Odds.Ratio)
  
  return(or)
})
odds_ratios <- as.data.frame(odds_ratios)

neglog10_pvals <- lapply(enrichr_over, function(res) {
  pval <- res[["GO_Biological_Process_2021"]] %>%
    filter(str_detect(Term, paste(pathways, collapse="|"))) %>%
    arrange(Term) %>%
    mutate(neglog10 = -log10(Adjusted.P.value)) %>% 
    pull(neglog10)
  
  return(pval)
})
neglog10_pvals <- as.data.frame(neglog10_pvals)

combined_scores <- lapply(enrichr_over, function(res) {
  c_score <- res[["GO_Biological_Process_2021"]] %>%
    filter(str_detect(Term, paste(pathways, collapse="|"))) %>%
    arrange(Term) %>%
    pull(Combined.Score)
  
  return(c_score)
})
combined_scores <- as.data.frame(combined_scores)

row.names(odds_ratios) <- enrichr_over[[1]][["GO_Biological_Process_2021"]] %>%
  filter(str_detect(Term, paste(pathways, collapse="|"))) %>%
  arrange(Term) %>%
  mutate(Term = str_wrap(Term, width = 60)) %>% 
  pull(Term)

row.names(neglog10_pvals) <- row.names(odds_ratios)
row.names(combined_scores) <- row.names(odds_ratios)

Odds Ratio

pheatmap::pheatmap(
  odds_ratios,
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  scale = "row",
)

-log10(adjusted p-value)

pheatmap::pheatmap(
  neglog10_pvals,
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  scale = "row",
)

Combined Score

pheatmap::pheatmap(
  combined_scores,
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  scale = "row",
)

Volcano Plots and DEG Tables

state_d2_vs_MEF

res <- lfc_dfs[[x]]
padj_cutoff <- 0.01
lfc_cutoff <- 0.5

EnhancedVolcano::EnhancedVolcano(
  res,
  lab = res$SYMBOL,
  x = "log2FoldChange",
  y = "padj",
  pCutoff = padj_cutoff,
  FCcutoff = lfc_cutoff,
  subtitle = sprintf(
    "Adjusted p-value cutoff = %.2f\n|Log2FoldChange| cutoff = %.1f",
    padj_cutoff, lfc_cutoff
  )
)

res %>%
  select(SYMBOL, log2FoldChange, padj) %>%
  filter(padj <= padj_cutoff & abs(log2FoldChange) >= lfc_cutoff) %>%
  arrange(padj) %>% 
  DT::datatable() %>%
  DT::formatSignif(columns = 2:3, digits = 3)

state_d4_vs_MEF

res <- lfc_dfs[[x]]
padj_cutoff <- 0.01
lfc_cutoff <- 0.5

EnhancedVolcano::EnhancedVolcano(
  res,
  lab = res$SYMBOL,
  x = "log2FoldChange",
  y = "padj",
  pCutoff = padj_cutoff,
  FCcutoff = lfc_cutoff,
  subtitle = sprintf(
    "Adjusted p-value cutoff = %.2f\n|Log2FoldChange| cutoff = %.1f",
    padj_cutoff, lfc_cutoff
  )
)

res %>%
  select(SYMBOL, log2FoldChange, padj) %>%
  filter(padj <= padj_cutoff & abs(log2FoldChange) >= lfc_cutoff) %>%
  arrange(padj) %>% 
  DT::datatable() %>%
  DT::formatSignif(columns = 2:3, digits = 3)

state_d6_vs_MEF

res <- lfc_dfs[[x]]
padj_cutoff <- 0.01
lfc_cutoff <- 0.5

EnhancedVolcano::EnhancedVolcano(
  res,
  lab = res$SYMBOL,
  x = "log2FoldChange",
  y = "padj",
  pCutoff = padj_cutoff,
  FCcutoff = lfc_cutoff,
  subtitle = sprintf(
    "Adjusted p-value cutoff = %.2f\n|Log2FoldChange| cutoff = %.1f",
    padj_cutoff, lfc_cutoff
  )
)

res %>%
  select(SYMBOL, log2FoldChange, padj) %>%
  filter(padj <= padj_cutoff & abs(log2FoldChange) >= lfc_cutoff) %>%
  arrange(padj) %>% 
  DT::datatable() %>%
  DT::formatSignif(columns = 2:3, digits = 3)

state_d9_vs_MEF

res <- lfc_dfs[[x]]
padj_cutoff <- 0.01
lfc_cutoff <- 0.5

EnhancedVolcano::EnhancedVolcano(
  res,
  lab = res$SYMBOL,
  x = "log2FoldChange",
  y = "padj",
  pCutoff = padj_cutoff,
  FCcutoff = lfc_cutoff,
  subtitle = sprintf(
    "Adjusted p-value cutoff = %.2f\n|Log2FoldChange| cutoff = %.1f",
    padj_cutoff, lfc_cutoff
  )
)

res %>%
  select(SYMBOL, log2FoldChange, padj) %>%
  filter(padj <= padj_cutoff & abs(log2FoldChange) >= lfc_cutoff) %>%
  arrange(padj) %>% 
  DT::datatable() %>%
  DT::formatSignif(columns = 2:3, digits = 3)

state_d12_vs_MEF

res <- lfc_dfs[[x]]
padj_cutoff <- 0.01
lfc_cutoff <- 0.5

EnhancedVolcano::EnhancedVolcano(
  res,
  lab = res$SYMBOL,
  x = "log2FoldChange",
  y = "padj",
  pCutoff = padj_cutoff,
  FCcutoff = lfc_cutoff,
  subtitle = sprintf(
    "Adjusted p-value cutoff = %.2f\n|Log2FoldChange| cutoff = %.1f",
    padj_cutoff, lfc_cutoff
  )
)

res %>%
  select(SYMBOL, log2FoldChange, padj) %>%
  filter(padj <= padj_cutoff & abs(log2FoldChange) >= lfc_cutoff) %>%
  arrange(padj) %>% 
  DT::datatable() %>%
  DT::formatSignif(columns = 2:3, digits = 3)

state_iPSC_vs_MEF

res <- lfc_dfs[[x]]
padj_cutoff <- 0.01
lfc_cutoff <- 0.5

EnhancedVolcano::EnhancedVolcano(
  res,
  lab = res$SYMBOL,
  x = "log2FoldChange",
  y = "padj",
  pCutoff = padj_cutoff,
  FCcutoff = lfc_cutoff,
  subtitle = sprintf(
    "Adjusted p-value cutoff = %.2f\n|Log2FoldChange| cutoff = %.1f",
    padj_cutoff, lfc_cutoff
  )
)

res %>%
  select(SYMBOL, log2FoldChange, padj) %>%
  filter(padj <= padj_cutoff & abs(log2FoldChange) >= lfc_cutoff) %>%
  arrange(padj) %>% 
  DT::datatable() %>%
  DT::formatSignif(columns = 2:3, digits = 3)

state_ESC_vs_MEF

res <- lfc_dfs[[x]]
padj_cutoff <- 0.01
lfc_cutoff <- 0.5

EnhancedVolcano::EnhancedVolcano(
  res,
  lab = res$SYMBOL,
  x = "log2FoldChange",
  y = "padj",
  pCutoff = padj_cutoff,
  FCcutoff = lfc_cutoff,
  subtitle = sprintf(
    "Adjusted p-value cutoff = %.2f\n|Log2FoldChange| cutoff = %.1f",
    padj_cutoff, lfc_cutoff
  )
)

res %>%
  select(SYMBOL, log2FoldChange, padj) %>%
  filter(padj <= padj_cutoff & abs(log2FoldChange) >= lfc_cutoff) %>%
  arrange(padj) %>% 
  DT::datatable() %>%
  DT::formatSignif(columns = 2:3, digits = 3)

References

1.
Velychko S, Adachi K, Kim KP, et al. Excluding Oct4 from yamanaka cocktail unleashes the developmental potential of iPSCs. Cell Stem Cell. 2019;25(6):737-753.e4. doi:10.1016/j.stem.2019.10.002